library(dplyr)
library(lme4)
library(texreg)
library(ggplot2)
library(questionr)
library(tidyr)
library(psych)
library(forcats)


# 2015 -----------

data <- readstata13::read.dta13("726_Selects2015_PES_Data_v1.03.dta",
                                nonint.factors = TRUE)

data <- subset(data, f11800main7 != "Don't know, no answer")

dat <- data %>% 
      mutate(pvote = f11800main7,
             pvote = as.factor(as.numeric(pvote)),
             educ = educ_r,
             mw_list = ifelse(as.numeric(f12025)<4,as.numeric(f12025),NA),
             women_list = mw_list-2,
             mw_qualf = as.numeric(f12026),
             mw_qualf = ifelse(mw_qualf%in%c(1,3),0,ifelse(mw_qualf%in%c(2),1, NA)),
             age = ifelse(birthyear>-9, 2021 - birthyear,NA), 
             age2 = age^2,
             female = as.numeric(gender)-1) %>% 
      select(female,age, age2, canton, pvote,educ,f12025,mw_list,women_list, f12026,mw_qualf) 

table(dat$educ)
dat <- dat %>%  mutate(
  educ_recode = as.numeric(educ),
  educ_recode = case_when(educ_recode %in% c(1:3) ~ "compulsory or lower",
                          educ_recode %in% c(4,5) ~ "vocational",
                          educ_recode %in% c(6:9) ~ "secondary",
                          educ_recode %in% c(10:11) ~ "higher vocational",
                          educ_recode %in% c(12:13) ~ "university"))
dat$educ_recode <- factor(dat$educ_recode, levels = c("compulsory or lower","vocational", "secondary","higher vocational","university"))

table(data$f11800main7)
table(dat$pvote)
levels(dat$pvote) <- c("SVP", "FDP", "BDP", "CVP", "GLP", "SP", "GPS","Other")


# Include Listenplätze ---

list <- read.csv("NRW2015-kandidierende.csv", sep = ";")
ldat <- list %>% mutate(kt = kanton_bezeichnung,
                        list = liste_bezeichnung,
                        party = as.factor(ifelse( partei_bezeichnung_de %in% c("CSP","EDU","EVP","Lega","MCR","PdA/Sol.","SD","Übrige"),
                                        "Other",partei_bezeichnung_de)),
                        lplace = kandidat_nummer,
                        gew = flag_gewaehlt) %>% 
        select(kt,party,lplace,geschlecht,gew) 

lsum <- ldat %>% group_by(kt,party,geschlecht) %>% 
            summarise(total = n()) %>% ungroup() %>% 
            group_by(kt,party) %>% mutate(total_party=sum(total), share = total/total_party) %>% ungroup() %>% 
            filter(geschlecht!="") 

ls <- lsum %>% select(kt,party,geschlecht,share) %>% spread(key = "geschlecht", value = "share") 
names(ls) <- c("kt","pvote","female","male")

ls <- ls %>%
        mutate(sharef = female, 
                sharef = ifelse(is.na(sharef),0,sharef)) %>% 
        mutate(canton = ifelse(kt=="Fribourg / Freiburg","Fribourg", 
                                   ifelse(kt=="Bern / Berne","Bern", 
                                          ifelse(kt=="Graubünden / Grigioni / Grischun","Graubünden",
                                                 ifelse(kt=="Valais / Wallis","Valais", kt))))) %>% 
        select(pvote,sharef,canton,kt)

ls$pvote <- factor(ls$pvote, levels = c("SVP", "FDP", "BDP", "CVP", "GLP", "SP", "GPS","Other"))

# merge
dat <- dat %>% arrange(canton, pvote)
ls <- ls %>% arrange(canton,pvote)
dat$canton <- as.character(dat$canton)
dat15 <- merge(dat, ls, by=c("pvote","canton"), all.x=T)



# _Model 1 ------------

# Nachdem Sie die Liste verändert haben, sind am Ende...(mehr Frauen= 1, gleich viel Frauen wie Männer/mehr Männer = 0)

table(dat15$women_list)
table(dat15$f12025)

dat1 <- dat15 %>% select(women_list,female,age,age2,educ_recode,sharef,pvote,canton) %>% 
  na.omit()

mod1 <- lm(women_list ~ female + age + age2 + educ_recode + sharef +
                         pvote + canton, data = dat1)

screenreg(list(mod1), omit.coef = "canton")

# Descriptives (Table A5)
library(xtable)
xtable(describe(dat1)[c(2,3,4,5,8,9)])


# _Model 2 ------------

# Wenn Sie zwischen zwei gleich qualifizierten Kandidierenden auswählen müssten, würden Sie eher...
# (Frauen wählen = 1, Männer wählen = 0)

table(dat$mw_qualf)
table(data$f12026)

dat2 <- dat15 %>% select(mw_qualf,female,age,age2,educ_recode,sharef,pvote,canton) %>% 
  na.omit()

mod2 <- glm(mw_qualf ~ female + age + age2 + educ_recode + sharef +
              pvote + canton, data = dat2,family = binomial(link = "logit"))

screenreg(list(mod1,mod2))
          

# Descriptives (Table A6)
xtable(describe(dat2)[c(2,3,4,5,8,9)])

rm(data,dat,ldat,list, ls, lsum)


# 2019 -----------

data <- readstata13::read.dta13("Selects2019_PES_v1.0_all codes.dta",
                                nonint.factors = TRUE)

dat <- data %>% 
  mutate(pvote = as.factor(f11800main6),
         pvote = ifelse(f11800rec=="BDP/PBD - Conservative Democratic Party", "BDP", 
                        ifelse(pvote == "Don't know, no answer, other answer", NA, pvote)),
         pvote = as.factor(pvote),
         educ = f21310,
         mw_list = as.numeric(f12025),
         mw_list = ifelse(mw_list %in%c(4:6), mw_list, NA),
         women_list = mw_list-5,
         mw_qualf = as.numeric(f12026),
         mw_qualf = ifelse(mw_qualf%in%c(4,6),0,ifelse(mw_qualf==5,1,NA)),
         canton = f10000,
         female = as.numeric(sex)-4,
         age = ifelse(birthyear>-9, 2021 - birthyear,NA),
         age2 = age^2) %>% 
  select(female,age, age2, canton, pvote, educ,f12025,mw_list,women_list,f12026,mw_qualf,
         f11800main6)

table(data$f11800main6)
table(dat$pvote)
levels(dat$pvote) <- c("SVP", "FDP",  "CVP", "GLP", "SP", "GPS","Other","BDP")


dat$pvote <- factor(dat$pvote, levels = c("SVP", "FDP", "BDP", "CVP", "GLP", "SP", "GPS","Other"))


table(dat$educ)
levels(dat$educ)

dat <- dat %>%  mutate(
  educ_recode = as.numeric(educ)-3,
  educ_recode = case_when(educ_recode %in% c(1:3) ~ "compulsory or lower",
                          educ_recode %in% c(4,5) ~ "vocational",
                          educ_recode %in% c(6:9) ~ "secondary",
                          educ_recode %in% c(10:11) ~ "higher vocational",
                          educ_recode %in% c(12:13) ~ "university"),
  educ_recode = as.factor(educ_recode))

dat$educ_recode <- factor(dat$educ_recode, levels = c("compulsory or lower","vocational", "secondary","higher vocational","university"))


# Include Listenplätze ---

list <- read.csv("NRW2019-kandidierende.csv", sep = ";")
ldat <- list %>% mutate(kt = kanton_bezeichnung,
                        list = liste_bezeichnung,
                        party = as.factor(ifelse( partei_bezeichnung_de %in% c("CSP","EDU","EVP","Lega",
                                                                  "MCR","PdA/Sol.","SD","Übrige","FGA","LPS"),
                                                  "Other",partei_bezeichnung_de)),
                        lplace = kandidat_nummer,
                        gew = flag_gewaehlt) %>% 
  select(kt,party,lplace,geschlecht,gew) 

lsum <- ldat %>% group_by(kt,party,geschlecht) %>% 
  summarise(total = n()) %>% ungroup() %>% 
  group_by(kt,party) %>% mutate(total_party=sum(total), share = total/total_party) %>% ungroup() %>% 
  filter(geschlecht!="") 

ls <- lsum %>% select(kt,party,geschlecht,share) %>% spread(key = "geschlecht", value = "share") %>% 
  rename(sharef = F, pvote = party) %>% mutate(sharef=ifelse(is.na(sharef),0,sharef)) %>% 
  mutate(canton = ifelse(kt=="Fribourg / Freiburg","Fribourg", 
                         ifelse(kt=="Bern / Berne","Bern", 
                                ifelse(kt=="Graubünden / Grigioni / Grischun","Graubünden",
                                       ifelse(kt=="Valais / Wallis","Valais",
                                              ifelse(kt=="Genève","Geneva",
                                                     ifelse(kt=="Luzern","Lucerne", 
                                                            ifelse(kt=="Zürich","Zurich",kt)))))))) %>% 
  select(pvote,sharef,canton,kt)

ls$pvote <- factor(ls$pvote, levels = c("SVP", "FDP", "BDP", "CVP", "GLP", "SP", "GPS","Other"))

# merge
dat$canton <- as.character(dat$canton)
str(ls$canton)

dat <- dat %>% arrange(canton, pvote)
ls <- ls %>% arrange(canton,pvote)

dat19 <- merge(dat, ls, by=c("pvote","canton"), all.x=T)



# _Model 3 ------------

# Nachdem Sie die Liste verändert haben, sind am Ende...(mehr Frauen= 1, gleich viel Frauen wie Männer/mehr Männer = 0)

table(dat19$women_list)
table(dat19$f12025)

dat3 <- dat19 %>% select(women_list,female,age,age2, educ_recode,sharef,pvote,canton) %>% 
  na.omit()

mod3 <- lm(women_list ~ female + age + age2 + educ_recode + sharef +
             pvote + canton, data = dat19)

screenreg(list(mod3))

# Descriptives (Table A7)
library(xtable)
xtable(describe(dat3)[c(2,3,4,5,8,9)])


# _Model 4 ------------

# Wenn Sie zwischen zwei gleich qualifizierten Kandidierenden auswählen müssten, würden Sie eher...
# (Frauen wählen = 1, Männer wählen = 0)

table(dat19$mw_qualf)
table(dat19$f12026)

dat4 <- dat19 %>% select(mw_qualf,female,age,age2, educ_recode,sharef,pvote,canton) %>% 
  na.omit()

mod4 <- glm(mw_qualf ~ female + age + age2 + educ_recode + sharef +
              pvote + canton, data = dat19, family = binomial(link = "logit"))

screenreg(list(mod4))

# Descriptives (Table A8)
library(xtable)
xtable(describe(dat19)[c(2,3,4,5,8,9)])


texreg(list(mod1,mod3,mod2, mod4),
        omit.coef="canton", 
        custom.coef.names = c("(Intercept)", 
                              "Female",
                              "Age",
                              "Age squared",
                              "Education: vocational",
                              "Education: secondary",
                              "Education: higher vocational",
                              "Education: university",
                              "Share of women on list",
                              "FDP",
                              "BDP",
                              "CVP",
                              "GLP",
                              "SP",
                              "GPS",
                              "Other"),
       custom.model.names =c("2015 list","2019 list", "2015 qualif", "2019 qualif"))


screenreg(list(mod1,mod3,mod2, mod4),
       omit.coef="canton", 
       custom.coef.names = c("(Intercept)", 
                             "Female",
                             "Age",
                             "Age squared",
                             "Education: vocational",
                             "Education: secondary",
                             "Education: higher vocational",
                             "Education: university",
                             "Share of women on list",
                             "FDP",
                             "BDP",
                             "CVP",
                             "GLP",
                             "SP",
                             "GPS",
                             "Other"),
       custom.model.names =c("2015 list","2019 list", "2015 qualif", "2019 qualif"))


rm(data,dat,ldat,list, ls, lsum)


# Share of women on list ----------

ggplot(dat1, aes(sharef)) + geom_histogram()


# Combined dataset ---------------------------

combdat1 <- dat15 %>% select(women_list,mw_qualf,female,age,age2,educ_recode,sharef,
                              pvote,canton)
combdat1$year <- 1
combdat2 <- dat19 %>% select(women_list,mw_qualf,female,age,age2,educ_recode,sharef,
                           pvote,canton) 
combdat2$year <- 2
combdat <- rbind(combdat1,combdat2)
combdat$year <- as.factor(combdat$year)

# _Model 5 -----
cdat1 <- combdat %>% select(women_list,female,age,age2, educ_recode,sharef,pvote,canton,year) %>% 
  na.omit()

mod5 <- lm(women_list ~ female + age + age2 + educ_recode + sharef +
             pvote*year + canton, data = cdat1)

screenreg(list(mod5), omit.coef = "canton")

# _Model 6 ----

cdat2 <- combdat %>% select(mw_qualf,female,age,age2, educ_recode,sharef,pvote,canton,year) %>% 
  na.omit()
mod6 <- glm(mw_qualf ~ female + age + age2 + educ_recode + sharef +
              pvote*year + canton, data = cdat2, family = binomial(link = "logit"))

screenreg(list(mod6), omit.coef = "canton")


# Plots ---------------
library(effects)

# _Figure 2 ----
eff1 <- as.data.frame(allEffects(mod5))$`pvote:year`
eff1$pvote <- as.factor(as.character(eff1$pvote))
eff1$pvote <- factor(eff1$pvote, levels= c("SVP", "FDP", "BDP", "CVP", "GLP", "SP", "GPS","Other"))
eff1$year <- as.factor(as.character(eff1$year))
levels(eff1$year) <- c("2015","2019")

pdf(file = "Figure2.pdf", paper = "special", width = 6, height = 4)
ggplot(eff1, aes(pvote, fit, color=year)) +
  geom_hline(yintercept = 0, color = "grey", linetype="dotted") +
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = lower, ymax = upper),position = position_dodge(0.3), width = 0.15) +
  labs(x = " ", 
       y = "More men . . . . . . . . more women") +
  ylim(-1,1) +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), 
                     panel.grid.minor = element_blank())+scale_colour_grey(end=0.65)
dev.off()

# _Figure 3----
eff1 <- as.data.frame(allEffects(mod6))$`pvote:year`
eff1$pvote <- as.factor(as.character(eff1$pvote))
eff1$pvote <- factor(eff1$pvote, levels= c("SVP", "FDP", "BDP", "CVP", "GLP", "SP", "GPS","Other"))
eff1$year <- as.factor(as.character(eff1$year))
levels(eff1$year) <- c("2015","2019")

pdf(file = "Figure3.pdf", paper = "special", width = 6, height = 4)
ggplot(eff1, aes(pvote, fit, color=year)) +
  geom_hline(yintercept = 0.5, color = "grey", linetype="dotted") +
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(0.3), width = 0.15) +
  labs(x = " ", 
       y = "Predicted Probability to choose women") +
  ylim(0,1) +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank())+scale_colour_grey(end=0.65)
dev.off()


# Ordinal logistic regression (Table A9) ---------------------

dat15$women_list1 <- as.factor(dat15$women_list)

library(MASS)
mod1a <- polr(women_list1 ~ female + age + educ_recode + sharef +
                pvote + canton, Hess=T,data = dat15)

dat19$women_list1 <- as.factor(dat19$women_list)

library(MASS)
mod1b <- polr(women_list1 ~ female + age + educ_recode + sharef +
                pvote + canton, Hess=T,data = dat19)


screenreg(list(mod1a,mod1b), omit.coef = "canton")

texreg(list(mod1a,mod1b),
       omit.coef="canton", 
       custom.coef.names = c( 
                             "Female",
                             "Age",
                             "Education: vocational",
                             "Education: secondary",
                             "Education: higher vocational",
                             "Education: university",
                             "Share of women on list",
                             "FDP",
                             "BDP",
                             "CVP",
                             "GLP",
                             "SP",
                             "GPS",
                             "Other",
                             "-1|0", "0|1"),
       custom.model.names =c("2015","2019"))

